library(tidyverse) # For data cleaning
library(haven) # To read .sav files
library(psych) # For Cronbach's alpha

# Function to normalize variables
normalize <- function(x, na.rm = TRUE) {
  return((x - min(x, na.rm = T)) / (max(x, na.rm = T) - min(x, na.rm = T)))
}

#### Cleaning citizen dataset ####
# Codebook: https://services.fsd.tuni.fi/catalogue/FSD3875?study_language=fi&tab=variables

cit <- read_sav("daF3875_fin.sav") %>% 
  mutate(across(where(is.labelled), ~as_factor(.x))) %>% 
  # Will only use version 2 of the study due to question availability
  filter(survey == "2") %>% 
  # Candidate data does not have Åland candidates, exclude Åland citizens
  filter(VOTE_CAND_DIST != "Ahvenanmaan maakunnan vaalipiiri" | is.na(VOTE_CAND_DIST)) %>%
  # Remove those who did not give their election district, but voted for an Åland party
  filter(!grepl("ÅL", PCHOICE2023_OTHER)) %>%
  filter(!grepl("LÖFSTRÖM", PCHOICE2023_OTHER)) %>%
  filter(!grepl("OBUNDEN", PCHOICE2023_OTHER)) %>%
  # Since we have excluded people from Åland, our weight might be skewed -> Rescale the weight
    mutate(rake_weights = rake_weights2 / mean(rake_weights2)) %>% 
  # Select relevant variables
  select(DEMSATISF, FINVALUES1:FINVALUES14, K6_1, K6_3, K6_4, K6_8, K6_9, PCHOICE2023, PCHOICE2023_OTHER, 
         LEFTRIGHT_SELF, LIBCONS_SELF, BYEAR, GENDER, EDU, MUNSIZE, PARTYMEMB,
         rake_weights2, rake_weights, survey) %>% 
  ## Recode party choice in the parliamentary election 2023
  # Party choice - 10 levels
  mutate(party_10 = fct_collapse(PCHOICE2023,
                                 "Muu" = levels(PCHOICE2023)[10:11])) %>% 
  mutate(party_10 = fct_other(party_10,
                              keep = c("Suomen Sosialidemokraattinen Puolue (SDP)", 
                                       "Perussuomalaiset (PS)",
                                       "Kansallinen Kokoomus (KOK)",
                                       "Suomen Keskusta (KESK)",
                                       "Vihreä liitto (VIHR)",
                                       "Vasemmistoliitto (VAS)",
                                       "Suomen ruotsalainen kansanpuolue (RKP)",
                                       "Suomen Kristillisdemokraatit (KD)",
                                       "Liike Nyt (LIIK)",
                                       "Muu"),
                              other_level = NA_character_)) %>%
  mutate(party_10 = fct_recode(party_10,
                               "SDP" = "Suomen Sosialidemokraattinen Puolue (SDP)", 
                               "PS" = "Perussuomalaiset (PS)",
                               "KOK" = "Kansallinen Kokoomus (KOK)",
                               "KESK" = "Suomen Keskusta (KESK)",
                               "VIHR" = "Vihreä liitto (VIHR)",
                               "VAS" = "Vasemmistoliitto (VAS)",
                               "RKP" = "Suomen ruotsalainen kansanpuolue (RKP)",
                               "KD" = "Suomen Kristillisdemokraatit (KD)",
                               "LIIK" = "Liike Nyt (LIIK)",
                               "Other" = "Muu")) %>%
  mutate(party_10 = factor(party_10, levels = c("KOK", "PS", "SDP", "KESK", "VAS", "VIHR", "RKP", "KD", "LIIK", "Other"))) %>% 
  # Categorize "other" smaller parties as liberal or illiberal
  mutate(other_party = na_if(PCHOICE2023_OTHER, "")) %>% 
  mutate(other_party = factor(other_party)) %>% 
  mutate(other_party = droplevels(other_party)) %>% 
  mutate(other_party_type = fct_collapse(other_party,
                             "Liberal" = levels(other_party)[1:4],
                             "Liberal" = levels(other_party)[6:9],
                             "Illiberal" = levels(other_party)[10:11],
                             "Liberal" = levels(other_party)[12:16],
                             "Illiberal" = levels(other_party)[17:18])) %>% 
  mutate(other_party_type = case_when(other_party_type == "EN MUISTA" ~ NA_character_,
                                      PCHOICE2023 == "Valta Kuuluu Kansalle (VKK)" ~ "Illiberal",
                                      TRUE ~ other_party_type)) %>% 
  # Categorize all parties as liberal or illiberal
  mutate(party_2 = case_when(other_party_type == "Liberal" ~ "Liberal",
                             other_party_type == "Illiberal" ~ "Illiberal",
                             party_10 == "PS" ~ "Illiberal",
                             is.na(party_10) ~ NA_character_,
                             party_10 == "SDP" ~ "Liberal",
                             party_10 == "KOK" ~ "Liberal",
                             party_10 == "KESK" ~ "Liberal",
                             party_10 == "VIHR" ~ "Liberal",
                             party_10 == "VAS" ~ "Liberal",
                             party_10 == "RKP" ~ "Liberal",
                             party_10 == "KD" ~ "Liberal",
                             party_10 == "LIIK" ~ "Liberal")) %>% 
  mutate(party_2 = factor(party_2, levels = c("Liberal", "Illiberal"))) %>% 
  # Categorize liberal or illiberal - Only with parliament representation
  mutate(party_2_parl = case_when(
                             party_10 == "PS" ~ "Illiberal",
                             party_10 == "SDP" ~ "Liberal",
                             party_10 == "KOK" ~ "Liberal",
                             party_10 == "KESK" ~ "Liberal",
                             party_10 == "VIHR" ~ "Liberal",
                             party_10 == "VAS" ~ "Liberal",
                             party_10 == "RKP" ~ "Liberal",
                             party_10 == "KD" ~ "Liberal",
                             party_10 == "LIIK" ~ "Liberal",
                             TRUE ~ NA_character_)) %>%
  mutate(party_2_parl = factor(party_2_parl, levels = c("Liberal", "Illiberal"))) %>% 
  ## Create political experience
  # Is or has been member in political party
  mutate(part_memb = case_when(PARTYMEMB == "Olen" ~ 1,
                               PARTYMEMB == "En ole, mutta olen ollut aikaisemmin" ~ 1,
                               PARTYMEMB == "En ole koskaan ollut minkään poliittisen puolueen jäsen" ~ 0)) %>% 
  # Has conducted some type of political behavior (in the last 4 years)
  mutate(contact_pol = K6_1, # Contacted a politician
         engage_party = K6_3, # Been engaged in a political party
         engage_ngo = K6_4, # Been engaged in a NGO
         protest = K6_8, # Participated in a peaceful demonstration
         civ_disob = K6_9) %>% # Participated in civil disobedience 
  mutate(across(contact_pol:civ_disob, ~ case_when(
    . == "Olen tehnyt viimeksi kuluneiden neljän vuoden aikana" ~ 1, # 1 if has done, otherwise 0
    . == "En ole tehnyt, mutta saattaisin tehdä" ~ 0,
    . == "En tekisi missään tapauksessa" ~ 0,
    . == "En osaa sanoa" ~ 0))) %>% 
  # Create a mean variable of political experience (higher = higher)
  mutate(pol_exp_1 = rowMeans(select(., 
                                   part_memb, contact_pol, engage_party,
                                   engage_ngo, protest, civ_disob), na.rm = TRUE)) %>% 
  mutate(pol_exp_1 = case_when(is.nan(pol_exp_1) ~ NA, TRUE ~ pol_exp_1)) %>%
  # Create alternative pol_exp_1 variable by excluding different activities
  mutate(pol_exp_2 = rowMeans(select(., 
                                   part_memb, engage_party,
                                   engage_ngo, protest, civ_disob), na.rm = TRUE)) %>% 
  mutate(pol_exp_2 = case_when(is.nan(pol_exp_2) ~ NA, TRUE ~ pol_exp_2)) %>%
  mutate(pol_exp_3 = rowMeans(select(., 
                                     engage_party,
                                     engage_ngo, protest, civ_disob), na.rm = TRUE)) %>% 
  mutate(pol_exp_3 = case_when(is.nan(pol_exp_3) ~ NA, TRUE ~ pol_exp_3)) %>%
  mutate(pol_exp_4 = rowMeans(select(., 
                                     contact_pol, engage_party,
                                     engage_ngo, protest, civ_disob), na.rm = TRUE)) %>% 
  mutate(pol_exp_4 = case_when(is.nan(pol_exp_4) ~ NA, TRUE ~ pol_exp_4)) %>%
  mutate(pol_exp_5 = rowMeans(select(., 
                                     part_memb, engage_party, engage_ngo), na.rm = TRUE)) %>% 
  mutate(pol_exp_5 = case_when(is.nan(pol_exp_5) ~ NA, TRUE ~ pol_exp_5)) %>%
  mutate(pol_exp_6 = rowMeans(select(., 
                                     engage_party, engage_ngo), na.rm = TRUE)) %>% 
  mutate(pol_exp_6 = case_when(is.nan(pol_exp_6) ~ NA, TRUE ~ pol_exp_6)) %>%
  # Self-placement left-right ideology, numeric and categorical
  mutate(left_right = (as.numeric(LEFTRIGHT_SELF)-1)/10) %>% 
  mutate(left_right = case_when(
    left_right %in% c(1.1, 1.2) ~ NA_real_, # Remove don't knows
    TRUE ~ left_right)) %>% 
  mutate(left_right_cat = case_when(
    left_right <= 0.3 ~ "Left",
    left_right >= 0.4 & left_right <= 0.6 ~ "Moderate",
    left_right >= 0.7 ~ "Right",
    TRUE ~ NA)) %>% 
  mutate(left_right_cat = factor(left_right_cat, levels = c("Left", "Moderate", "Right"))) %>% 
  # Self-placement liberal-conservative, numeric and categorical
  mutate(lib_con = (as.numeric(LIBCONS_SELF)-1)/10) %>% 
  mutate(lib_con = case_when(
    lib_con %in% c(1.1, 1.2) ~ NA_real_, # Remove don't knows
    TRUE ~ lib_con)) %>%
  mutate(lib_con_cat = case_when(
    lib_con <= 0.3 ~ "Liberal",
    lib_con >= 0.4 & lib_con <= 0.6 ~ "Moderate",
    lib_con >= 0.7 ~ "Conservative",
    TRUE ~ NA)) %>% 
  mutate(lib_con_cat = factor(lib_con_cat, levels = c("Liberal", "Moderate", "Conservative"))) %>%
  # Recode general satisfaction with democracy into numeric
  mutate(sat_dem = case_when(DEMSATISF == "En osaa sanoa" ~ NA_character_,
                             TRUE ~ DEMSATISF)) %>%
  mutate(sat_dem = factor(sat_dem, levels = c("En lainkaan tyytyväinen", "En kovinkaan tyytyväinen", 
                                              "Melko tyytyväinen", "Erittäin tyytyväinen"))) %>% 
  mutate(sat_dem = (as.numeric(sat_dem) - 1)/ 3) %>% 
  # Categorical satisfaction with democracy in English
  mutate(sat_dem_cat = case_when(DEMSATISF == "En osaa sanoa" ~ NA_character_,
                                 TRUE ~ DEMSATISF)) %>% 
  mutate(sat_dem_cat = recode(sat_dem_cat,
                              "Erittäin tyytyväinen" = "Very satisfied",
                              "Melko tyytyväinen" = "Fairly satisfied",
                              "En kovinkaan tyytyväinen" = "Not particularly satisfied",
                              "En lainkaan tyytyväinen" = "Not at all satisfied")) %>% 
  mutate(sat_dem_cat = factor(sat_dem_cat, levels = c("Very satisfied", 
                                                      "Fairly satisfied", 
                                                      "Not particularly satisfied",
                                                      "Not at all satisfied"))) %>% 
  # Start recoding items used for illiberalism index (Very bad proposal - Very good proposal)
  mutate(
    multiculture = FINVALUES1, # A multicultural Finland, tolerant of people from other countries
    immigration = FINVALUES10, # A Finland where immigration is more widespread
    women_rights = FINVALUES4, # A Finland that promotes gender equality more strongly than before
    gay_rights = FINVALUES6, # A Finland that strengthens the rights of gender and sexual minorities
    christian_values = FINVALUES2, # A Finland where Christian values play a greater role
    eu_influence = FINVALUES11) %>% # A Finland less committed to the European Union
  # Make them numeric
  mutate_at(vars(multiculture:eu_influence),
            ~as.numeric(factor(.))) %>% 
  mutate_at(vars(multiculture:gay_rights), # Reverse these items
            ~ (1-(.x-1)/10)) %>%
  mutate_at(vars(christian_values:eu_influence),
            ~ (.x-1)/10) %>% 
  # Create an index (0-1) - Higher = more illiberal
  mutate(illiberalism = rowMeans(select(., 
                                      multiculture, immigration, 
                                      women_rights, gay_rights, 
                                      christian_values, eu_influence), na.rm = TRUE)) %>% 
  mutate(illiberalism = case_when(is.nan(illiberalism) ~ NA,
                                TRUE ~ illiberalism)) %>% 
  # Clean up some demographics
  mutate(age = 2023 - BYEAR) %>% # Age 
  mutate(agegrp4 = cut(age, # Age categories
                       breaks = c(17, 34, 49, 64, Inf),
                       labels = c("18-34", "35-49", "50-64", "65-"),
                       right = TRUE)) %>% 
  mutate(gender = case_when( # Gender dichotomous
    GENDER == "Mies" ~ "Man",
    GENDER == "Nainen" ~ "Woman",
    TRUE ~ NA_character_)) %>% 
  mutate(gender = factor(gender, levels = c("Man", "Woman"))) %>% 
  mutate(edu3 = fct_collapse(EDU, # Educational attainment, 3 levels
                             "Low" = levels(EDU)[1:3],
                             "Medium" = levels(EDU)[4:7],
                             "High" = levels(EDU)[8:12])) %>% 
  mutate(edu3 = case_when(edu3 == "En halua sanoa" ~ NA_character_,
                          edu3 == "En osaa sanoa" ~ NA_character_,
                          TRUE ~ edu3)) %>% 
  mutate(edu3 = factor(edu3, levels = c("Low", "Medium", "High"))) %>% 
  mutate(edu2 = fct_collapse(edu3, # Educational attainment, 2 levels
                             "No degree" = c("Low", "Medium"),
                             "Degree" = "High")) %>% 
  # Size of the place where they live - higher = higher population density
  mutate(pop_dens = case_when(
    MUNSIZE == "Suuren kaupungin keskustassa (yli 100 000 asukasta)" ~ 1,
    MUNSIZE == "Pienemmän kaupungin keskustassa (alle 100 000 asukasta)" ~ 0.75,
    MUNSIZE == "Esikaupunkialueella tai kaupunkilähiössä" ~ 0.5,
    MUNSIZE == "Kuntakeskuksessa tai muussa taajamassa" ~ 0.25,
    MUNSIZE == "Maaseudun haja-asutusalueella" ~ 0,
    MUNSIZE %in% c("En halua sanoa", "En osaa sanoa") ~ NA_real_,
    TRUE ~ NA_real_)) %>% 
  # Domicil type
  mutate(domicil = recode(MUNSIZE,
    "Suuren kaupungin keskustassa (yli 100 000 asukasta)" = "In the center of a big city (over 100 000 inhabitants)",
    "Pienemmän kaupungin keskustassa (alle 100 000 asukasta)" = "In the center of a smaller city (under 100 000 inhabitants)",
    "Esikaupunkialueella tai kaupunkilähiössä" = "In a suburb or close to a city",
    "Kuntakeskuksessa tai muussa taajamassa" = "In a municipal centre or other populaiton centre",
    "Maaseudun haja-asutusalueella" = "In a rural area")) %>% 
  mutate(domicil = factor(domicil, levels = c(
    "In the center of a big city (over 100 000 inhabitants)",
    "In the center of a smaller city (under 100 000 inhabitants)",
    "In a suburb or close to a city",
    "In a municipal centre or other populaiton centre",
    "In a rural area"
  )))

## Testing Cronbach's alpha of different conceptualizations of political experience
# Nr 1
index <- cit %>% 
  select(part_memb, contact_pol, engage_party,
         engage_ngo, protest, civ_disob) %>% 
  na.omit()
alpha(index, check.keys = TRUE)

# Nr 2
index <- cit %>% 
  select(part_memb, engage_party,
         engage_ngo, protest, civ_disob) %>% 
  na.omit()
alpha(index, check.keys = TRUE)

# Nr 3
index <- cit %>% 
  select(engage_party,
         engage_ngo, protest, civ_disob) %>% 
  na.omit()
alpha(index, check.keys = TRUE)

# Nr 4
index <- cit %>% 
  select(contact_pol, engage_party,
         engage_ngo, protest, civ_disob) %>% 
  na.omit()
alpha(index, check.keys = TRUE)

# Nr 5
index <- cit %>% 
  select(part_memb, engage_party, engage_ngo) %>% 
  na.omit()
alpha(index, check.keys = TRUE)

# Nr 6
index <- cit %>% 
  select(engage_party, engage_ngo) %>% 
  na.omit()
alpha(index, check.keys = TRUE)

# Check coding - Then remove unnecessary variables
cit <- cit %>% 
  select(rake_weights2, rake_weights, party_10, party_2, pol_exp_1:domicil)

# Save cleaned file
cit %>% 
  write_sav(paste0(Sys.Date(), "cit_clean.sav"))


#### Cleaning politicians dataset ####
# This data not publicly available, be in contact with the Finnish National Election Consortium
# regarding gaining access to the data: https://www.vaalitutkimus.fi/en/the-election-study-consortium/
pol <- read_sav("ehdokaskysely2023_thomas_kim_Oct2024.sav") %>% 
  mutate(across(where(is.labelled), ~as_factor(.x))) %>%
  # Recode party affiliation in the parliamentary election 2023
  mutate(party_10 = fct_collapse(A2, 
                                 "Muu" = levels(A2)[10:25])) %>% 
  mutate(party_10 = fct_recode(party_10,
                               "KOK" = "Kansallinen Kokoomus",
                               "PS" = "Perussuomalaiset",
                               "SDP" = "Suomen Sosialidemokraattinen Puolue",
                               "KESK" = "Suomen Keskusta",
                               "VAS" = "Vasemmistoliitto",
                               "VIHR" = "Vihreä liitto",
                               "RKP" = "Suomen ruotsalainen kansanpuolue",
                               "KD" = "Suomen Kristillisdemokraatit",
                               "LIIK" = "Liike Nyt",
                               "Other" = "Muu")) %>% 
  # Categorize parties as either liberal or illiberal
  mutate(party_2 = fct_collapse(A2,
                                 "Liberal" = c("Kansallinen Kokoomus", 
                                               levels(A2)[3:9],
                                               levels(A2)[11:15],
                                               levels(A2)[20:21]),
                                 "Illiberal" = c("Perussuomalaiset",
                                                 "Valta kuuluu kansalle",
                                                 "Kansalaisliitto",
                                                 "Korjausliike",
                                                 "Suomen Kansa Ensin",
                                                 "Kristallipuolue",
                                                 "Vapauden Liitto",
                                                 "Sinimusta Liike"))) %>% 
  mutate(party_2 = fct_recode(party_2, NULL = "Valitsijayhdistys, mikä?")) %>%
  mutate(party_2 = fct_drop(party_2)) %>% 
  # Categorize liberal or illiberal - Only with parliament representation
  mutate(party_2_parl = case_when(
    party_10 == "PS" ~ "Illiberal",
    party_10 == "SDP" ~ "Liberal",
    party_10 == "KOK" ~ "Liberal",
    party_10 == "KESK" ~ "Liberal",
    party_10 == "VIHR" ~ "Liberal",
    party_10 == "VAS" ~ "Liberal",
    party_10 == "RKP" ~ "Liberal",
    party_10 == "KD" ~ "Liberal",
    party_10 == "LIIK" ~ "Liberal",
    TRUE ~ NA_character_)) %>%
  mutate(party_2_parl = factor(party_2_parl, levels = c("Liberal", "Illiberal"))) %>% 
  mutate(parl23 = A5_1, # Parliamentary election 2023
         parl19 = A5_2, # Parliamentary election 2019
         parl15 = A5_3, # Parliamentary election 2015
         bef_parl15 = A5_4, # Before the parliamentary election 2015
         mun21 = A6_1, # Municipal election 2021
         mun17 = A6_2, # Municipal election 2017
         mun12 = A6_3, # Municipal election 2012
         bef_mun12 = A6_4, # Before the municipal election 2012
         welf22 = A7) %>%  # The welfare area election 2022
  mutate(across(parl23:welf22, ~ case_when(
    . == "Olin ehdolla ja tulin valituksi" ~ 1, # 1 if ran and was elected
    . == "Olin ehdolla, mutta en tullut valituksi" ~ 0, # 0 if ran but wasn't elected
    . == "En ollut ehdolla" ~ 0))) %>%  # 0 if didn't run
  # Create a mean variable of political experience (higher = higher)
  mutate(pol_exp = rowMeans(select(., 
                                   parl23, parl19, parl15, bef_parl15,
                                   mun21, mun17, mun12, bef_mun12, welf22), na.rm = TRUE)) %>% 
  mutate(pol_exp = case_when(is.nan(pol_exp) ~ NA, TRUE ~ pol_exp)) %>% 
  
  
  ## Political experience where we account for levels of government
  mutate(parl23lvl = A5_1, # Parliamentary election 2023
         parl19lvl = A5_2, # Parliamentary election 2019
         parl15lvl = A5_3, # Parliamentary election 2015
         bef_parl15lvl = A5_4, # Before the parliamentary election 2015
         mun21lvl = A6_1, # Municipal election 2021
         mun17lvl = A6_2, # Municipal election 2017
         mun12lvl = A6_3, # Municipal election 2012
         bef_mun12lvl = A6_4, # Before the municipal election 2012
         welf22lvl = A7) %>%  # The welfare area election 2022
  # Parliament elected = 1
  mutate(across(parl23lvl:bef_parl15lvl, ~ case_when(
    . == "Olin ehdolla ja tulin valituksi" ~ 1, # 1 if ran and was elected
    . == "Olin ehdolla, mutta en tullut valituksi" ~ 0, # 0 if ran but wasn't elected
    . == "En ollut ehdolla" ~ 0))) %>%  # 0 if didn't run
  # Welfare area elected = 0.67
  mutate(welf22lvl = case_when(
    welf22lvl == "Olin ehdolla ja tulin valituksi" ~ 0.67,  # 0.67 if ran and was elected
    welf22lvl == "Olin ehdolla, mutta en tullut valituksi" ~ 0,    # 0 if ran but wasn't elected
    welf22lvl == "En ollut ehdolla" ~ 0  # 0 if did not run
  )) %>%  # 0 if didn't run
  # Municipality elected = 0.33
  mutate(across(mun21lvl:bef_mun12lvl, ~ case_when(
    . == "Olin ehdolla ja tulin valituksi" ~ 0.33, # 1 if ran and was elected
    . == "Olin ehdolla, mutta en tullut valituksi" ~ 0, # 0 if ran but wasn't elected
    . == "En ollut ehdolla" ~ 0))) %>%  # 0 if didn't run
  # Create a mean variable of political experience (higher = higher)
  mutate(pol_exp_lvl = rowSums(select(., 
                                   parl23lvl, parl19lvl, parl15lvl, bef_parl15lvl,
                                   mun21lvl, mun17lvl, mun12lvl, bef_mun12lvl, welf22lvl), na.rm = TRUE)) %>%
  # Normalize 0-1
  mutate(pol_exp_lvl = normalize(pol_exp_lvl)) %>% 
  # Code elected if an elected politician at some level, either before or after the election
  mutate(elected = if_else(Europarlamentaarikko == "1" | # Member of European Parliament
                             Kansanedustaja == "1" | # Member of Finnish parliament
                             Kunnanvaltuutettu == "1" | # Member of municipal council
                             Aluevaltuutettu == "1", # Member of welfare area council
                           "Elected", "Not elected")) %>% 
  mutate(elected = if_else(A5_1 == "Olin ehdolla ja tulin valituksi", # Self-report elected -23
                           "Elected", elected)) %>% 
  mutate(elected = factor(elected, levels = c("Elected", "Not elected"))) %>% 
  # Self-placement left-right ideology, numeric and categorical
  mutate(left_right = (as.numeric(C4)-1)/10) %>% 
  mutate(left_right_cat = case_when(
    left_right <= 0.3 ~ "Left",
    left_right >= 0.4 & left_right <= 0.6 ~ "Moderate",
    left_right >= 0.7 ~ "Right",
    TRUE ~ NA)) %>% 
  mutate(left_right_cat = factor(left_right_cat, levels = c("Left", "Moderate", "Right"))) %>% 
  # Self-placement liberal-conservative, numeric and categorical
  mutate(lib_con = (as.numeric(C5)-1)/10) %>% 
  mutate(lib_con_cat = case_when(
    lib_con <= 0.3 ~ "Liberal",
    lib_con >= 0.4 & lib_con <= 0.6 ~ "Moderate",
    lib_con >= 0.7 ~ "Conservative",
    TRUE ~ NA)) %>% 
  mutate(lib_con_cat = factor(lib_con_cat, levels = c("Liberal", "Moderate", "Conservative"))) %>% 
  # Recode general satisfaction with democracy into numeric (higher = more satisfied)
  mutate(sat_dem = fct_relevel(D1,
                               "En lainkaan tyytyväinen",
                               "En erityisen tyytyväinen",
                               "Melko tyytyväinen",
                               "Erittäin tyytyväinen")) %>%
  mutate(sat_dem = (as.numeric(sat_dem)-1)/3) %>% 
  # Categorical satisfaction with democracy in English
  mutate(sat_dem_cat = recode(D1,
                     "Erittäin tyytyväinen" = "Very satisfied",
                     "Melko tyytyväinen" = "Fairly satisfied",
                     "En erityisen tyytyväinen" = "Not particularly satisfied",
                     "En lainkaan tyytyväinen" = "Not at all satisfied")) %>% 
  mutate(sat_dem_cat = factor(sat_dem_cat, levels = c("Very satisfied", 
                                                      "Fairly satisfied", 
                                                      "Not particularly satisfied",
                                                      "Not at all satisfied"))) %>% 
  # Start recoding items used for illiberalism index (Very bad proposal - Very good proposal)
  mutate(
    multiculture = C2_1, # A multicultural Finland, tolerant of people from other countries
    immigration = C2_10, # A Finland where immigration is more widespread
    women_rights = C2_4, # A Finland that promotes gender equality more strongly than before
    gay_rights = C2_6, # A Finland that strengthens the rights of gender and sexual minorities
    christian_values = C2_2, # A Finland where Christian values play a greater role
    eu_influence = C2_11) %>% # A Finland less committed to the European Union
  #Make them numeric
  mutate_at(vars(multiculture:eu_influence),
          ~as.numeric(factor(.))) %>% 
  mutate_at(vars(multiculture:gay_rights), # Reverse these items
            ~ (1-(.x-1)/10)) %>%
  mutate_at(vars(christian_values:eu_influence),
            ~ (.x-1)/10) %>% 
  # Create an index (0-1) - Higher = more illiberal
  mutate(illiberalism = rowMeans(select(., 
                                      multiculture, immigration, 
                                      women_rights, gay_rights, 
                                      christian_values, eu_influence), na.rm = TRUE)) %>% 
  mutate(illiberalism = case_when(is.nan(illiberalism) ~ NA,
                                TRUE ~ illiberalism)) %>% 
  # Clean up some demographics
  # Use registry data where possible, otherwise supplement with self-report
  mutate(age = Ikävaalipäivänä) %>% # Age
  mutate(self_age = 2023 - E2) %>% 
  mutate(age = case_when(
    is.na(age) ~ self_age,
    TRUE ~ age)) %>% 
  mutate(agegrp4 = cut(age, # Age categories
                       breaks = c(17, 34, 49, 64, Inf),
                       labels = c("18-34", "35-49", "50-64", "65-"),
                       right = TRUE)) %>% 
  mutate(gender = case_when( # Gender
    Sukupuoli == 1 ~ "Mies",
    Sukupuoli == 2 ~ "Nainen",
    TRUE ~ as.character(Sukupuoli))) %>% 
  mutate(gender = case_when(
    is.na(gender) & E1 == "Nainen" ~ "Nainen",
    is.na(gender) & E1 == "Mies" ~ "Mies",
    TRUE ~ gender)) %>%
  mutate(gender = na_if(gender, "")) %>% 
  mutate(gender = case_when(
    gender == "Mies" ~ "Man",
    gender == "Nainen" ~ "Woman")) %>% 
  mutate(gender = factor(gender, levels = c("Man", "Woman"))) %>% 
  mutate(edu3 = fct_collapse(E7, # Educational attainment, 3 levels
                             "Low" = levels(E7)[1:3],
                             "Medium" = levels(E7)[4:7],
                             "High" = levels(E7)[8:12])) %>% 
  mutate(edu2 = fct_collapse(edu3, # Educational attainment, 2 levels
                             "No degree" = c("Low", "Medium"),
                             "Degree" = "High")) %>% 
  # Size of the place where they live - higher = higher population density
  mutate(pop_dens = case_when(
    E6 == "Suuren kaupungin keskustassa (yli 100 000 asukasta)" ~ 1,
    E6 == "Pienemmän kaupungin keskustassa (alle 100 000 asukasta)" ~ 0.75,
    E6 == "Esikaupunkialueella tai kaupunkilähiössä" ~ 0.5,
    E6 == "Kuntakeskuksessa tai muussa taajamassa" ~ 0.25,
    E6 == "Maaseudun haja-asutusalueella" ~ 0,
    TRUE ~ NA_real_)) %>% 
  # Domicil type
  mutate(domicil = recode(E6,
    "Suuren kaupungin keskustassa (yli 100 000 asukasta)" = "In the center of a big city (over 100 000 inhabitants)",
    "Pienemmän kaupungin keskustassa (alle 100 000 asukasta)" = "In the center of a smaller city (under 100 000 inhabitants)",
    "Esikaupunkialueella tai kaupunkilähiössä" = "In a suburb or close to a city",
    "Kuntakeskuksessa tai muussa taajamassa" = "In a municipal centre or other populaiton centre",
    "Maaseudun haja-asutusalueella" = "In a rural area")) %>% 
  mutate(domicil = factor(domicil, levels = c(
    "In the center of a big city (over 100 000 inhabitants)",
    "In the center of a smaller city (under 100 000 inhabitants)",
    "In a suburb or close to a city",
    "In a municipal centre or other populaiton centre",
    "In a rural area"
  )))

# Check coding - Then remove unnecessary variables
pol <- pol %>% 
  select(rake_weights, party_10:party_2, pol_exp:domicil, -self_age)

# Save cleaned file
pol %>% 
  write_sav(paste0(Sys.Date(), "pol_clean.sav"))


